market_basket <- read.transactions(
file = '../data/market_basket.csv',
sep = ',',
quote = "",
format = 'basket', # each record in the file will be treated as transaction
rm.duplicates = TRUE,
skip=1
)
distribution of transactions with duplicates:
items
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 22
1029 473 266 159 83 61 52 32 16 15 10 11 4 3 2 1 5 1 1 1
23 25 27 34 52
2 1 1 1 1
# inspecting first 5 transactions
market_basket %>% head(n=5) %>% inspect
items
[1] {1,
MEDIUM CERAMIC TOP STORAGE JAR}
[2] {2,
3D DOG PICTURE PLAYING CARDS,
AIRLINE BAG VINTAGE JET SET BROWN,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE GREEN,
ALARM CLOCK BAKELIKE ORANGE,
ALARM CLOCK BAKELIKE PINK,
ALARM CLOCK BAKELIKE RED,
BATHROOM METAL SIGN,
BLACK CANDELABRA T-LIGHT HOLDER,
BLACK EAR MUFF HEADPHONES,
BLACK GRAND BAROQUE PHOTO FRAME,
BLUE 3 PIECE POLKADOT CUTLERY SET,
BLUE DRAWER KNOB ACRYLIC EDWARDIAN,
BOOM BOX SPEAKER BOYS,
BOX OF 6 ASSORTED COLOUR TEASPOONS,
CAMOUFLAGE EAR MUFF HEADPHONES,
CLEAR DRAWER KNOB ACRYLIC EDWARDIAN,
COLOUR GLASS. STAR T-LIGHT HOLDER,
EMERGENCY FIRST AID TIN,
FOUR HOOK WHITE LOVEBIRDS,
GREEN DRAWER KNOB ACRYLIC EDWARDIAN,
LARGE HEART MEASURING SPOONS,
MINI PAINT SET VINTAGE,
PINK 3 PIECE POLKADOT CUTLERY SET,
PINK DRAWER KNOB ACRYLIC EDWARDIAN,
PURPLE DRAWERKNOB ACRYLIC EDWARDIAN,
RED 3 PIECE RETROSPOT CUTLERY SET,
RED DRAWER KNOB ACRYLIC EDWARDIAN,
RED TOADSTOOL LED NIGHT LIGHT,
SET OF 2 TINS VINTAGE BATHROOM,
SET/3 DECOUPAGE STACKING TINS}
[3] {3,
3D DOG PICTURE PLAYING CARDS,
60 TEATIME FAIRY CAKE CASES,
72 SWEETHEART FAIRY CAKE CASES,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE JET SET WHITE,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE GREEN,
ALARM CLOCK BAKELIKE ORANGE,
ALARM CLOCK BAKELIKE PINK,
ALARM CLOCK BAKELIKE RED,
BLACK CANDELABRA T-LIGHT HOLDER,
BLUE NEW BAROQUE CANDLESTICK CANDLE,
BOX OF 6 ASSORTED COLOUR TEASPOONS,
CHOCOLATE CALCULATOR,
MINI LADLE LOVE HEART RED,
PACK OF 60 MUSHROOM CAKE CASES,
PACK OF 60 SPACEBOY CAKE CASES,
PINK NEW BAROQUECANDLESTICK CANDLE,
RED RETROSPOT OVEN GLOVE,
RED RETROSPOT OVEN GLOVE DOUBLE,
RED TOADSTOOL LED NIGHT LIGHT,
REGENCY CAKESTAND 3 TIER,
SANDWICH BATH SPONGE,
SET OF 2 TINS VINTAGE BATHROOM,
SET/2 RED RETROSPOT TEA TOWELS,
SMALL HEART MEASURING SPOONS,
TEA TIME OVEN GLOVE,
TOOTHPASTE TUBE PEN,
WOODLAND CHARLOTTE BAG}
[4] {3D SHEET OF CAT STICKERS,
3D SHEET OF DOG STICKERS,
4,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE JET SET RED,
AIRLINE BAG VINTAGE JET SET WHITE,
AIRLINE BAG VINTAGE TOKYO 78,
GIFT BAG PSYCHEDELIC APPLES,
HOLIDAY FUN LUDO,
ICE CREAM SUNDAE LIP GLOSS,
LARGE HEART MEASURING SPOONS,
MINI PAINT SET VINTAGE,
PACK OF 60 DINOSAUR CAKE CASES,
RED DRAWER KNOB ACRYLIC EDWARDIAN,
RED RETROSPOT OVEN GLOVE DOUBLE,
RED RETROSPOT PURSE,
RED TOADSTOOL LED NIGHT LIGHT,
REGENCY CAKESTAND 3 TIER,
ROSES REGENCY TEACUP AND SAUCER,
SET OF 2 TINS VINTAGE BATHROOM,
SMALL FOLDING SCISSOR(POINTED EDGE),
SMALL HEART MEASURING SPOONS,
TREASURE ISLAND BOOK BOX,
VINTAGE HEADS AND TAILS CARD GAME,
WATERING CAN PINK BUNNY}
[5] {3D DOG PICTURE PLAYING CARDS,
5,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE TOKYO 78,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE RED,
COAL BLACK,
FEATHER PEN,
NAMASTE SWAGAT INCENSE,
RABBIT NIGHT LIGHT,
REGENCY MILK JUG PINK,
REGENCY SUGAR BOWL GREEN,
REGENCY TEA PLATE GREEN,
REGENCY TEA PLATE PINK,
REGENCY TEA PLATE ROSES,
REGENCY TEA STRAINER,
REGENCY TEAPOT ROSES,
SMALL HEART MEASURING SPOONS,
TRIPLE HOOK ANTIQUE IVORY ROSE,
VICTORIAN SEWING KIT}
# plot for 10 most frequently brought items
itemFrequencyPlot(x = market_basket,
topN = 10,
type = 'absolute',
horiz = TRUE,
col = brewer.pal(10,'Spectral'))

rule1 <- market_basket %>% apriori(parameter = list(supp = 0.005, conf=0.8)) %>% sort(by = 'confidence')
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 92
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[22346 item(s), 18440 transaction(s)] done [0.14s].
sorting and recoding items ... [1257 item(s)] done [0.01s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 5 6 done [0.04s].
writing ... [561 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
summary(rule1)
set of 561 rules
rule length distribution (lhs + rhs):sizes
2 3 4 5 6
64 201 211 79 6
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 3.000 4.000 3.576 4.000 6.000
summary of quality measures:
support confidence coverage lift count
Min. :0.005043 Min. :0.8000 Min. :0.005152 Min. : 8.036 Min. : 93.0
1st Qu.:0.005477 1st Qu.:0.8362 1st Qu.:0.006345 1st Qu.: 25.003 1st Qu.:101.0
Median :0.006562 Median :0.8962 Median :0.007538 Median : 57.604 Median :121.0
Mean :0.007055 Mean :0.8936 Mean :0.007899 Mean : 58.506 Mean :130.1
3rd Qu.:0.007972 3rd Qu.:0.9412 3rd Qu.:0.008677 3rd Qu.: 91.274 3rd Qu.:147.0
Max. :0.024946 Max. :1.0000 Max. :0.030152 Max. :126.817 Max. :460.0
mining info:
rule1 %>% head(n=5)%>% inspect
rule1 %>% tail(n=5)%>% inspect
lhs rhs support confidence coverage lift count
[1] {SET OF 3 WOODEN HEART DECORATIONS,
SET OF 3 WOODEN SLEIGH DECORATIONS} => {SET OF 3 WOODEN STOCKING DECORATION} 0.006561822 0.8013245 0.008188720 53.92855 121
[2] {REGENCY MILK JUG PINK,
REGENCY SUGAR BOWL GREEN} => {REGENCY TEAPOT ROSES} 0.008947939 0.8009709 0.011171367 41.84108 165
[3] {PINK POLKADOT BOWL,
RED RETROSPOT BOWL} => {BLUE POLKADOT BOWL} 0.005422993 0.8000000 0.006778742 66.15247 100
[4] {SET OF 12 FAIRY CAKE BAKING CASES,
SET OF 6 SNACK LOAF BAKING CASES,
SET OF 6 TEA TIME BAKING CASES} => {SET OF 12 MINI LOAF BAKING CASES} 0.005856833 0.8000000 0.007321041 41.09192 108
[5] {LUNCH BAG APPLE DESIGN,
LUNCH BAG PINK POLKADOT,
LUNCH BAG WOODLAND} => {LUNCH BAG RED RETROSPOT} 0.006290672 0.8000000 0.007863341 11.45342 116
rule1 <- rule1 %>% sort(by='lift')
rule1 %>% head(n=5)%>% inspect
lhs rhs support confidence coverage lift count
[1] {DOLLY GIRL CHILDRENS CUP,
SPACEBOY CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS BOWL} 0.005206074 0.9696970 0.005368764 126.8171 96
[2] {DOLLY GIRL CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS CUP} 0.006344902 0.8297872 0.007646421 106.2589 117
[3] {DOLLY GIRL CHILDRENS CUP} => {DOLLY GIRL CHILDRENS BOWL} 0.006344902 0.8125000 0.007809111 106.2589 117
[4] {DOLLY GIRL CHILDRENS BOWL,
SPACEBOY CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS CUP} 0.005206074 0.8135593 0.006399132 104.1808 96
[5] {HERB MARKER BASIL,
HERB MARKER MINT,
HERB MARKER PARSLEY,
HERB MARKER ROSEMARY,
HERB MARKER THYME} => {HERB MARKER CHIVES} 0.007158351 0.9166667 0.007809111 101.8273 132
plot(rule1, engine='htmlwidget')
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rule1, method='two-key', engine='htmlwidget')
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rule1, method='graph', engine='htmlwidget')
Warning: Too many rules supplied. Only plotting the best 100 using ‘lift’ (change control parameter max if needed).
LS0tCnRpdGxlOiAiTWFya2V0IEJhc2tldCBBbmFseXNpcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQptYXJrZXRfYmFza2V0IDwtIHJlYWQudHJhbnNhY3Rpb25zKAogIGZpbGUgPSAnLi4vZGF0YS9tYXJrZXRfYmFza2V0LmNzdicsCiAgc2VwID0gJywnLCAKICBxdW90ZSA9ICIiLAogIGZvcm1hdCA9ICdiYXNrZXQnLCAjIGVhY2ggcmVjb3JkIGluIHRoZSBmaWxlIHdpbGwgYmUgdHJlYXRlZCBhcyB0cmFuc2FjdGlvbgogIHJtLmR1cGxpY2F0ZXMgPSBUUlVFLAogIHNraXA9MQopCmBgYAoKCmBgYHtyfQojIGluc3BlY3RpbmcgZmlyc3QgNSB0cmFuc2FjdGlvbnMKbWFya2V0X2Jhc2tldCAlPiUgaGVhZChuPTUpICU+JSBpbnNwZWN0CmBgYAoKYGBge3J9CiMgcGxvdCBmb3IgMTAgbW9zdCBmcmVxdWVudGx5IGJyb3VnaHQgaXRlbXMKaXRlbUZyZXF1ZW5jeVBsb3QoeCA9IG1hcmtldF9iYXNrZXQsCiAgICAgICAgICAgICAgICAgIHRvcE4gPSAxMCwKICAgICAgICAgICAgICAgICAgdHlwZSA9ICdhYnNvbHV0ZScsCiAgICAgICAgICAgICAgICAgIGhvcml6ID0gVFJVRSwKICAgICAgICAgICAgICAgICAgY29sID0gYnJld2VyLnBhbCgxMCwnU3BlY3RyYWwnKSkKYGBgCgoKYGBge3J9CnJ1bGUxIDwtIG1hcmtldF9iYXNrZXQgJT4lIGFwcmlvcmkocGFyYW1ldGVyID0gbGlzdChzdXBwID0gMC4wMDUsIGNvbmY9MC44KSkgJT4lIHNvcnQoYnkgPSAnY29uZmlkZW5jZScpCmBgYAoKYGBge3J9CnN1bW1hcnkocnVsZTEpCmBgYAoKYGBge3J9CnJ1bGUxICU+JSBoZWFkKG49NSklPiUgaW5zcGVjdApgYGAKCmBgYHtyfQpydWxlMSAlPiUgdGFpbChuPTUpJT4lIGluc3BlY3QKYGBgCmBgYHtyfQpydWxlMSA8LSBydWxlMSAlPiUgc29ydChieT0nbGlmdCcpCnJ1bGUxICU+JSBoZWFkKG49NSklPiUgaW5zcGVjdApgYGAKCmBgYHtyfQpwbG90KHJ1bGUxLCBlbmdpbmU9J2h0bWx3aWRnZXQnKQpgYGAKCmBgYHtyfQpwbG90KHJ1bGUxLCBtZXRob2Q9J3R3by1rZXknLCBlbmdpbmU9J2h0bWx3aWRnZXQnKQpgYGAKCmBgYHtyfQpwbG90KHJ1bGUxLCBtZXRob2Q9J2dyYXBoJywgZW5naW5lPSdodG1sd2lkZ2V0JykKYGBgCgo=